*-------------------------------------------------------------------------------
*						Appendix II Tab 1 and Appendix II Fig 2
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out" 

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Appendix II Tab 1 and Appendix II Fig 2", replace 
set more off     


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (1) CLDS Data
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata1,replace emptyok

*----------2.1: CLDS data in 2014
use "$App_data\CLDS\individual2014-clds.dta",clear

*keep variable for index
rename I3a1_10 index1_1
rename I3a3_10 index1_2
rename I7_3_8 index2
rename (I2_10 I2_11) (index3_1 index3_2)
rename (I3a1_12_1 I3a3_12_1) (index4_1 index4_2)
rename (I2_12_3 I2_12_4 I2_12_5 I2_12_6) (index5_1 index5_2 index5_3 index5_4)
rename I3a1_19_4 index5_5
rename I2_12_1 index6
rename (I3a1_20_a I3a1_20_f I3a1_20_b I3a1_20_c I3a1_20_i I3a1_20_h I3a1_20_e I3a1_20_d I3a1_20_g) (index7_1 index7_2 index7_3 index7_4 index7_5 index7_6 index7_7 index7_8 index7_9) 
rename I7_3_7 index8
rename I7_4_3 index9
rename (I1_8_1 I1_8_2 I1_8_1_oth) (index10_1 index10_2 index10_3)
rename I10_9 index11
rename (I3a1_15 I3a1_15_2) (index12_1 index12_2)
rename I3a1_19_3 index13
rename I3a1_19_1 index14
rename I3a1_19_2 index15
rename IID2014 id
rename I3a_7_0 employ
rename I3a_7_code_csco15 occ

keep id employ occ index*

*occ code is available
drop if occ==. | employ==2
drop employ
order id occ
compress

*Reading Abilities
gen index_reading=4-index6 if index6!=.

*Interpersonal Abilities
forvalues i = 1(1)9{
gen index_social`i'= 5-index7_`i' if  index7_`i' !=-8
}

*Cooperation Abilities
gen index_cooperate=6-index8 if index8!=-8

*Interaction Abilities
gen index_interact=6-index9

*Cognitive Abilities
gen index_cognitive= 5-index13 if index13!=.

*Physical Abilities
gen index_physical1= 5-index14 if index14!=.
gen index_physical2= 5-index15 if index15!=.

*Take average at occ level
keep occ index_reading index_social* index_cooperate index_interact index_cognitive index_physical*
collapse (mean) index*,by(occ)


*deal with missing data
gen type=substr(string(occ),1,3)
gen catory=substr(string(occ),1,1)

foreach var of varlist index_reading-index_physical2{
	bysort type:egen `var'_ad=mean(`var')
	bysort catory:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
}


*Std all ingrediants
foreach var of varlist index_reading-index_physical2{
egen std_`var'=std(`var')
drop `var'
rename std_`var' `var'
}

*Construct index
gen index_social= (index_social1 + index_social2 + index_social3 + index_social4 + index_social5 + index_social6 + index_social7 + index_social8 + index_social9)/9

gen index_physical= (index_physical1 + index_physical2)/2

keep occ index_reading index_cooperate index_interact index_social index_cognitive index_physical

append using tabledata1
save tabledata1,replace

**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (1) CLDS Index in ONET
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata2,replace emptyok

*Reading Comprehension
import excel "$App_data\ONET N_version\db_25_0_excel\Skills.xlsx", sheet("Skills") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Reading Comprehension" 
   tab scaleid
   keep if scaleid=="IM"
   *Take average of these interaction ratings
   collapse (mean) datavalue, by (onetsoccode)

   gen index="reading_onet"
   append using tabledata2
   save tabledata2,replace

*Cooperation
import excel "$App_data\ONET N_version\db_25_0_excel\Work Styles.xlsx", sheet("Work Styles") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Cooperation"
   tab scaleid
   keep if scaleid=="IM" 
   collapse (mean) datavalue, by (onetsoccode)

   gen index="cooperate"
   append using tabledata2
   save tabledata2,replace


*interaction
import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Contact With Others" 
   tab scaleid
   keep if  scaleid=="CX"
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact1"
   append using tabledata2
   save tabledata2,replace

  
import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Establishing and Maintaining Interpersonal Relationships" 
   tab scaleid
   keep if  scaleid=="IM"  
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact2"
   append using tabledata2
   save tabledata2,replace

   
   import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Communicating with Supervisors, Peers, or Subordinates" 
   tab scaleid
   keep if  scaleid=="IM" 
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact3"
   append using tabledata2
   save tabledata2,replace

   
   import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Communicating with Persons Outside Organization" 
   tab scaleid
   keep if  scaleid=="IM"
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact4"
   append using tabledata2
   save tabledata2,replace
   
   
*Brain
foreach name in "Written Comprehension" "Mathematical Reasoning" "Deductive Reasoning" "Inductive Reasoning"{

import excel "$App_data\ONET N_version\db_25_0_excel\Abilities.xlsx", sheet("Abilities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"  
	collapse (mean) datavalue,by(onetsoccode)
	
	gen index="`name'"
	append using tabledata2
    save tabledata2,replace
}

 
*Physical
foreach name in "Performing General Physical Activities" "Handling and Moving Objects"{

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"  
	collapse (mean) datavalue,by(onetsoccode)
	
	gen index="`name'"
	append using tabledata2
    save tabledata2,replace
}
  
*Change data framing
replace index="physical1" if index=="Performing General Physical Activities"
replace index="physical2" if index=="Handling and Moving Objects"
replace index="cognitive1" if index=="Written Comprehension"
replace index="cognitive2" if index=="Mathematical Reasoning" 
replace index="cognitive3" if index=="Deductive Reasoning" 
replace index="cognitive4" if index=="Inductive Reasoning"

reshape wide datavalue,i(onetsoccode) j(index,string)
rename datavalue* *
save tabledata2,replace

   
*Chinese occ in 2015
import excel "$Raw_data\occ2015_sem.xls", sheet("2015match") firstrow allstring clear

drop E F G H I J K L M N

rename onet_code onetsoccode
merge m:1 onetsoccode using tabledata2
drop if _m==2
drop _m

collapse (mean) cognitive1-reading_onet, by(occ_2015 title_2015_all)


*deal with missing data
gen type=substr(occ_2015,1,3)
gen catory=substr(occ_2015,1,1)

foreach var of varlist cognitive1-reading_onet{
	bysort type:egen `var'_ad=mean(`var')
	bysort catory:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
}


*Std
foreach var of varlist cognitive1-reading_onet {
	egen std_`var'=std(`var')
}


gen index_reading=std_reading_onet
gen index_cooperate=std_cooperate
gen index_interact1=(std_interact1 + std_interact2)/2
gen index_interact2=(std_interact3 + std_interact4)/2
gen index_cognitive= (std_cognitive1 +std_cognitive2 +std_cognitive3 +std_cognitive4)/4
gen index_physical= (std_physical2 + std_physical1)/2

keep occ_2015 title_2015_all index* 
rename index* onet*

destring occ_2015,gen(occ)
keep occ onet*

save tabledata2,replace


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (2) Xiangshuai Index
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata3,replace emptyok


import excel "$App_data\香帅的职业数据库\香帅的职业发展指数.xlsx", sheet("Sheet2") firstrow clear

collapse (mean) 创意指数 社交智慧指数 手艺指数 重复性白领指数 重复性蓝领指数,by(职业小类)
append using tabledata3
save tabledata3,replace


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (2) Xiangshuai Index in onet
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata4,replace emptyok


*Abstract Analytical(Acemoglu & Autor 2011)
foreach name in "Interpreting the Meaning of Information for Others" "Analyzing Data or Information" "Thinking Creatively"{

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"
	
	collapse (mean) datavalue,by(onetsoccode)
	
	gen index="`name'"
	append using tabledata4
    save tabledata4,replace
}


*Interact with each
foreach name in "Establishing and Maintaining Interpersonal Relationships" "Resolving Conflicts and Negotiating with Others" "Guiding, Directing, and Motivating Subordinates" "Coaching and Developing Others"{

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"
	
	collapse (mean) datavalue,by(onetsoccode)
	
	gen index="`name'"
	append using tabledata4
    save tabledata4,replace
}

*Manual
import excel "$App_data\ONET N_version\db_25_0_excel\Abilities.xlsx", sheet("Abilities") firstrow case(lower) clear

	tab  elementname
	keep if elementname=="Manual Dexterity"
	keep if scaleid=="IM" 

	collapse (mean) datavalue,by(onetsoccode)
	gen index="manual1"
	append using tabledata4
    save tabledata4,replace

	
import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="Operating Vehicles, Mechanized Devices, or Equipment"
	keep if scaleid=="IM"
   
	collapse (mean) datavalue,by(onetsoccode)
	gen index="manual2"
	append using tabledata4
    save tabledata4,replace   


*Routineness 
*       source: Acemoglu & Autor 2011
*       Skills, Tasks and Technologies: Implications for Employment and Earnings
*		Routine cognitive
*			4.C.3.b.7 Importance of repeating the same tasks (work context)
*			4.C.3.b.4 Importance of being exact or accurate (work context)
*			4.C.3.b.8 Structured v. Unstructured work (reverse) (work context)
*		Routine manual
*			4.C.3.d.3 Pace determined by speed of equipment (work context)
*			4.A.3.a.3 Controlling machines and processes (work activity)
*			4.C.2.d.1.i Spend time making repetitive motions (work context)


foreach name in "Importance of Repeating Same Tasks" "Importance of Being Exact or Accurate" "Spend Time Making Repetitive Motions" "Pace Determined by Speed of Equipment" "Degree of Automation" {

import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="CX" 
   	collapse (mean) datavalue, by (onetsoccode)
	
	gen index="`name'"
   	append using tabledata4
    save tabledata4,replace   
}


import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear

	keep if elementname=="Structured versus Unstructured Work" 
	keep if scaleid=="CX" 
	replace datavalue=6-datavalue
   	collapse (mean) datavalue, by (onetsoccode)
	
	gen index="Structured versus Unstructured Work" 
   	append using tabledata4
    save tabledata4,replace   


foreach name in "Controlling Machines and Processes" "Performing Administrative Activities"{

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"
    collapse (mean) datavalue, by (onetsoccode)

	gen index="`name'"
   	append using tabledata4
    save tabledata4,replace       
}

tab index
   
*change data framing 
replace index="Analyze" if index=="Analyzing Data or Information"
replace index="Interpret" if index=="Interpreting the Meaning of Information for Others"
replace index="Think" if index=="Thinking Creatively"
replace index="routinemanual2" if index=="Controlling Machines and Processes"
replace index="routinecogni4" if index=="Performing Administrative Activities"
replace index="interact1" if index=="Establishing and Maintaining Interpersonal Relationships"  
replace index="interact2" if index=="Resolving Conflicts and Negotiating with Others"
replace index="interact3" if index=="Guiding, Directing, and Motivating Subordinates"   
replace index="interact4" if index=="Coaching and Developing Others"
replace index="routinecogni1" if index=="Importance of Repeating Same Tasks"
replace index="routinecogni2" if index=="Importance of Being Exact or Accurate"
replace index="routinemanual3" if index=="Spend Time Making Repetitive Motions"
replace index="routinemanual1" if index=="Pace Determined by Speed of Equipment"
replace index="routinecogni3" if index=="Structured versus Unstructured Work"
replace index="routinecogni5" if index=="Degree of Automation"


reshape wide datavalue,i(onetsoccode) j(index,string)
rename datavalue* *
save tabledata4,replace


*Chinese occ
import excel "$Raw_data\occ2015_sem.xls", sheet("2015match") firstrow allstring clear

drop E F G H I J K L M N

rename onet_code onetsoccode
merge m:1 onetsoccode using tabledata4
drop if _m==2
drop _m


collapse (mean) Analyze-routinemanual3, by(occ_2015 title_2015_all)

*deal with missing value
gen type=substr(occ_2015,1,3)
gen catory=substr(occ_2015,1,1)

foreach var of varlist Analyze-routinemanual3 {
	bysort type:egen `var'_ad=mean(`var')
	bysort catory:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
}


*std
foreach var of varlist Analyze-routinemanual3 {
	egen std_`var'=std(`var')
}

*Construct
gen index_abstract=(std_Analyze + std_Interpret + std_Think)/3
gen index_interact=(std_interact1 + std_interact2 + std_interact3 + std_interact4)/4
gen index_manual=(std_manual1 + std_manual2)/2
gen index_routinemanual=(std_routinemanual1+std_routinemanual2+std_routinemanual3)/3
gen index_routinecognitive=(std_routinecogni1+std_routinecogni2+std_routinecogni3 + std_routinecogni4+std_routinecogni5)/5

rename title_2015_all 职业小类
keep 职业小类 index* occ_2015

save tabledata4,replace


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (3) Chat-gpt data
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata5,replace emptyok

import excel "$App_data\chatcheck.xlsx", sheet("Sheet2-整理") firstrow clear
rename HowimportantisREADINGCOMPREH reading
rename HowimportantisESTABLISHINGAN social1
rename HowimportantisCommunicatingw social2
rename E social3
rename HowimportantisContactWithOt social4
rename HowimportantisInterpretingth cognitive1
rename HowimportantisAnalyzingData cognitive2
rename HowimportantisThinkingCreati cognitive3
rename HowimportantisManualDexterit manual1
rename HowimportantisOperatingVehic manual2
rename Howimportantisrepeatingthes routinecog1
rename Howimportantisbeingexactor routinecog2
rename Howimportantisittothisoccu routinemanu1
rename HowimportantisControllingmac routinemanu2
rename Howmuchdoesthisjobrequirem routinemanu3
rename Towhatextentisthisjobstruc routinecog3
rename Howimportantisperformingphys physical1
rename Howimportantisusinghandsand physical2
rename 职业 职业小类
replace routinecog3=6-routinecog3

*std
foreach var of varlist reading - physical2 {
egen NL_`var'=std(`var')
drop `var'
rename NL_`var' `var'
}

*construct index
gen NL_reading=reading
gen NL_physical=(physical1+physical2)/2
gen NL_manual=(manual1+manual2)/2
gen NL_routinemanual=(routinemanu1 +  routinemanu2+  routinemanu3)/3
gen NL_routinecognitive=(routinecog1 + routinecog2 + routinecog3)/3
gen NL_cognitive=(cognitive1+cognitive2+cognitive3)/3
gen NL_social1=(social1+social4)/2
gen NL_social2=(social2+social3)/2

keep 职业小类 NL*
append using tabledata5
save tabledata5,replace


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (3) Chat-gpt data in onet
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata6,replace emptyok

*Interaction
import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Contact With Others" 
   tab scaleid
   keep if  scaleid=="CX"
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact1"
   append using tabledata6
   save tabledata6,replace
   
  
import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Establishing and Maintaining Interpersonal Relationships" 
   tab scaleid
   keep if  scaleid=="IM"  
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact2"
   append using tabledata6
   save tabledata6,replace

   
   import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Communicating with Supervisors, Peers, or Subordinates" 
   tab scaleid
   keep if  scaleid=="IM" 
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact3"
   append using tabledata6
   save tabledata6,replace

   
   import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear
   tab  elementname
   keep if elementname=="Communicating with Persons Outside Organization" 
   tab scaleid
   keep if  scaleid=="IM"
   collapse (mean) datavalue, by (onetsoccode)

   gen index="interact4"
   append using tabledata6
   save tabledata6,replace
   
*Abstract Analytical(Acemoglu & Autor 2011)
foreach name in "Interpreting the Meaning of Information for Others" "Analyzing Data or Information" "Thinking Creatively"{

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"
	
	collapse (mean) datavalue,by(onetsoccode)
	
	gen index="`name'"
   append using tabledata6
   save tabledata6,replace
}   
   
*Physical
foreach name in "Performing General Physical Activities" "Handling and Moving Objects"{

import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="IM"  
	collapse (mean) datavalue,by(onetsoccode)
	
	gen index="`name'"
   append using tabledata6
   save tabledata6,replace
}  


*Routine
foreach name in "Importance of Repeating Same Tasks" "Importance of Being Exact or Accurate" "Spend Time Making Repetitive Motions" "Pace Determined by Speed of Equipment" "Degree of Automation" {

import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear

	keep if elementname=="`name'"
	keep if scaleid=="CX" 
   	collapse (mean) datavalue, by (onetsoccode)
	
	gen index="`name'"
   append using tabledata6
   save tabledata6,replace 
}


import excel "$App_data\ONET N_version\db_25_0_excel\Work Context.xlsx", sheet("Work Context") firstrow case(lower) clear

	keep if elementname=="Structured versus Unstructured Work" 
	keep if scaleid=="CX" 
	replace datavalue=6-datavalue
   	collapse (mean) datavalue, by (onetsoccode)
	
	gen index="Structured versus Unstructured Work" 
   append using tabledata6
   save tabledata6,replace


import excel "$App_data\ONET N_version\db_25_0_excel\Work Activities.xlsx", sheet("Work Activities") firstrow case(lower) clear

	keep if elementname=="Controlling Machines and Processes"
	keep if scaleid=="IM"
    collapse (mean) datavalue, by (onetsoccode)

	gen index="Controlling Machines and Processes"
   append using tabledata6
   save tabledata6,replace

*Reading 
import excel "$App_data\ONET N_version\db_25_0_excel\Skills.xlsx", sheet("Skills") firstrow case(lower) clear
   keep if elementname=="Reading Comprehension" 
   keep if  scaleid=="IM"  
   collapse (mean) datavalue, by (onetsoccode)
   
   gen index="reading_onet"
   append using tabledata6
   save tabledata6,replace
   
   
*change data framing
replace index="physical1" if index=="Performing General Physical Activities"
replace index="physical2" if index=="Handling and Moving Objects"
replace index="Analyze" if index=="Analyzing Data or Information"
replace index="Interpret" if index=="Interpreting the Meaning of Information for Others"
replace index="Think" if index=="Thinking Creatively"
replace index="routinemanual2" if index=="Controlling Machines and Processes"
replace index="routinecogni4" if index=="Performing Administrative Activities"
replace index="routinecogni1" if index=="Importance of Repeating Same Tasks"
replace index="routinecogni2" if index=="Importance of Being Exact or Accurate"
replace index="routinemanual3" if index=="Spend Time Making Repetitive Motions"
replace index="routinemanual1" if index=="Pace Determined by Speed of Equipment"
replace index="routinecogni3" if index=="Structured versus Unstructured Work"
replace index="routinecogni5" if index=="Degree of Automation"

reshape wide datavalue,i(onetsoccode) j(index,string)
rename datavalue* *
save tabledata6,replace
   

*Chinese data
import excel "$Raw_data\occ2015_sem.xls", sheet("2015match") firstrow allstring clear

drop E F G H I J K L M N

rename onet_code onetsoccode
merge m:1 onetsoccode using tabledata6
drop if _m==2
drop _m

collapse (mean) Analyze-routinemanual3, by(occ_2015 title_2015_all)

*deal with missing value
gen type=substr(occ_2015,1,3)
gen catory=substr(occ_2015,1,1)

foreach var of varlist Analyze-routinemanual3{
	bysort type:egen `var'_ad=mean(`var')
	bysort catory:egen `var'_ad2=mean(`var')
	replace `var'=`var'_ad if `var'==.
	replace `var'=`var'_ad2 if `var'==.
	summarize `var'
	replace `var'=`r(mean)' if `var'==.
	drop `var'_ad `var'_ad2
}

*std
foreach var of varlist Analyze-routinemanual3{
	egen std_`var'=std(`var')
}


gen index_reading=std_reading_onet
gen index_interact1=(std_interact1 + std_interact2)/2
gen index_interact2=(std_interact3 + std_interact4)/2
gen index_cognitive= (std_Analyze + std_Interpret + std_Think)/3
gen index_physical= (std_physical2 + std_physical1)/2
gen index_routinecognitive=(std_routinecogni1+std_routinecogni2+std_routinecogni3)/3
gen index_routinemanual=(std_routinemanual1+std_routinemanual2+std_routinemanual3)/3

rename  title_2015_all 职业小类
keep 职业小类 index* occ_2015
rename index* onet*
save tabledata6,replace

**------------------------------------------------------------------------------
* Step2: Display Data 
**------------------------------------------------------------------------------
clear
set obs 0
save table,replace emptyok

*******Column 1
use tabledata1,clear
merge 1:1 occ using tabledata2
keep if _m==3
drop _m

gen occ_3dig=substr(string(occ),1,3)

collapse (mean) onet* index*,by(occ_3dig)

gen rho=.
gen p=.
gen index=""

*Correlation index
spearman index_reading onet_reading
replace rho=`r(rho)' in 1/1
replace p=`r(p)' in 1/1
replace index="reading" in 1/1

spearman index_cognitive onet_cognitive
replace rho=`r(rho)' in 2/2
replace p=`r(p)' in 2/2
replace index="cognitive" in 2/2

spearman index_physical onet_physical
replace rho=`r(rho)' in 3/3
replace p=`r(p)' in 3/3
replace index="physical" in 3/3

spearman index_social onet_interact2
replace rho=`r(rho)' in 4/4
replace p=`r(p)' in 4/4
replace index="social" in 4/4

keep rho p index
keep in 1/4
gen sample="clds"
append using table
save table,replace

*******Column 2
use tabledata3,clear
merge 1:1 职业小类 using tabledata4
keep if _m==3
drop _m

gen occ_3dig=substr(occ_2015,1,3)

collapse (mean) 创意指数 社交智慧指数 手艺指数 重复性白领指数 重复性蓝领指数 index*,by(occ_3dig)

gen rho=.
gen p=.
gen index=""

*Correlation index
spearman 创意指数 index_abstract
replace rho=`r(rho)' in 1/1
replace p=`r(p)' in 1/1
replace index="cognitive" in 1/1

spearman 社交智慧指数 index_interact
replace rho=`r(rho)' in 2/2
replace p=`r(p)' in 2/2
replace index="social" in 2/2

spearman 手艺指数 index_manual
replace rho=`r(rho)' in 3/3
replace p=`r(p)' in 3/3
replace index="physical" in 3/3

spearman 重复性蓝领指数 index_routinemanual
replace rho=`r(rho)' in 4/4
replace p=`r(p)' in 4/4
replace index="routinemanual" in 4/4

spearman 重复性白领指数 index_routinecognitive
replace rho=`r(rho)' in 5/5
replace p=`r(p)' in 5/5
replace index="routinecognitive" in 5/5

keep rho p index
keep in 1/5
gen sample="tangya"
append using table
save table,replace

***Column3
use tabledata5,clear
merge 1:1 职业小类 using tabledata6
keep if _m==3
drop _m

gen occ_3dig=substr(occ_2015,1,3)

collapse (mean) NL* onet*,by(occ_3dig)

gen rho=.
gen p=.
gen index=""


*Correlation index
spearman NL_cognitive onet_cognitive
replace rho=`r(rho)' in 1/1
replace p=`r(p)' in 1/1
replace index="cognitive" in 1/1

spearman NL_social2 onet_interact2
replace rho=`r(rho)' in 2/2
replace p=`r(p)' in 2/2
replace index="social" in 2/2

spearman NL_physical onet_physical
replace rho=`r(rho)' in 3/3
replace p=`r(p)' in 3/3
replace index="physical" in 3/3

spearman NL_routinemanual onet_routinemanual
replace rho=`r(rho)' in 4/4
replace p=`r(p)' in 4/4
replace index="routinemanual" in 4/4

spearman NL_routinecognitive onet_routinecognitive   
replace rho=`r(rho)' in 5/5
replace p=`r(p)' in 5/5
replace index="routinecognitive" in 5/5

spearman NL_reading onet_reading
replace rho=`r(rho)' in 6/6
replace p=`r(p)' in 6/6
replace index="reading" in 6/6

keep rho p index
keep in 1/6
gen sample="gpt"
append using table
save table,replace

gen star=""
replace star="*" if p<=0.1
replace star="**" if p<=0.05
replace star="***" if p<=0.01
drop p
reshape wide rho star,i(index) j(sample,string)
format rho* %9.4f

order index rhoclds starclds   rhotangya   startangya rhogpt   stargpt
sortobs index, values("cognitive" "physical" "social" "reading" "routinemanual" "routinecognitive") 

list _all

**------------------------------------------------------------------------------
* Step3: Plot Data 
**------------------------------------------------------------------------------
* Fig ---CLDS
use tabledata1,clear
merge 1:1 occ using tabledata2
keep if _m==3
drop _m

gen occ_3dig=substr(string(occ),1,3)

collapse (mean) onet* index*,by(occ_3dig)


twoway (scatter index_reading onet_reading) ///
(lfit index_reading onet_reading,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("阅读能力-本文") ytitle("阅读能力-CLDS")
graph save "$Out_lab\appII_Fig2-6a",replace
graph export "$Out_lab\appII_Fig2-6a.png",replace

twoway (scatter index_cognitive onet_cognitive) ///
(lfit index_cognitive onet_cognitive,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("认知能力-本文") ytitle("认知能力-CLDS")
graph save "$Out_lab\appII_Fig2-2a",replace
graph export "$Out_lab\appII_Fig2-2a.png",replace


twoway (scatter index_physical onet_physical) ///
(lfit index_physical onet_physical,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("体力能力-本文") ytitle("体力能力-CLDS")
graph save "$Out_lab\appII_Fig2-1a",replace
graph export "$Out_lab\appII_Fig2-1a.png",replace

twoway (scatter index_social onet_interact2) ///
(lfit index_social onet_interact2,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("社交能力-本文") ytitle("社交能力-CLDS")
graph save "$Out_lab\appII_Fig2-3a",replace
graph export "$Out_lab\appII_Fig2-3a.png",replace



*Fig---tangya
use tabledata3,clear
merge 1:1 职业小类 using tabledata4
keep if _m==3
drop _m

gen occ_3dig=substr(occ_2015,1,3)

collapse (mean) 创意指数 社交智慧指数 手艺指数 重复性白领指数 重复性蓝领指数 index*,by(occ_3dig)


*然后我们绘制scatter plot
twoway (scatter 创意指数 index_abstract) ///
(lfit 创意指数 index_abstract,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("认知能力-本文") ytitle("认知能力-职业发展数据库") ///
ylabel(0(20)100)
graph save "$Out_lab\appII_Fig2-2b",replace
graph export "$Out_lab\appII_Fig2-2b.png",replace


twoway (scatter 社交智慧指数 index_interact) ///
(lfit 社交智慧指数 index_interact,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("社交能力-本文") ytitle("社交能力-职业发展数据库") ///
ylabel(0(20)100)
graph save "$Out_lab\appII_Fig2-3b",replace
graph export "$Out_lab\appII_Fig2-3b.png",replace


twoway (scatter 手艺指数 index_manual) ///
(lfit 手艺指数 index_manual,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("体力能力-本文") ytitle("体力能力-职业发展数据库") ///
ylabel(0(20)100)
graph save "$Out_lab\appII_Fig2-1b",replace
graph export "$Out_lab\appII_Fig2-1b.png",replace


twoway (scatter 重复性蓝领指数 index_routinemanual) ///
(lfit 重复性蓝领指数 index_routinemanual,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("重复性体力-本文") ytitle("重复性体力-职业发展数据库") ///
ylabel(0(20)100)
graph save "$Out_lab\appII_Fig2-4a",replace
graph export "$Out_lab\appII_Fig2-4a.png",replace

twoway (scatter 重复性白领指数 index_routinecognitive) ///
(lfit 重复性白领指数 index_routinecognitive,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("重复性认知-本文") ytitle("重复性认知-职业发展数据库") ///
ylabel(0(20)100)
graph save "$Out_lab\appII_Fig2-5a",replace
graph export "$Out_lab\appII_Fig2-5a.png",replace




use tabledata5,clear
merge 1:1 职业小类 using tabledata6
keep if _m==3
drop _m

gen occ_3dig=substr(occ_2015,1,3)

collapse (mean) NL* onet*,by(occ_3dig)

twoway (scatter NL_reading onet_reading) ///
(lfit NL_reading onet_reading,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("阅读能力-本文") ytitle("阅读能力-CHAT-GPT")
graph save "$Out_lab\appII_Fig2-6b",replace
graph export "$Out_lab\appII_Fig2-6b.png",replace


twoway (scatter NL_social2 onet_interact2) ///
(lfit NL_social2 onet_interact2,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("社交能力-本文") ytitle("社交能力-CHAT-GPT")
graph save "$Out_lab\appII_Fig2-3c",replace
graph export "$Out_lab\appII_Fig2-3c.png",replace

twoway (scatter NL_cognitive onet_cognitive) ///
(lfit NL_cognitive onet_cognitive,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("认知能力-本文") ytitle("认知能力-CHAT-GPT")
graph save "$Out_lab\appII_Fig2-2c",replace
graph export "$Out_lab\appII_Fig2-2c.png",replace


twoway (scatter NL_physical onet_physical) ///
(lfit NL_physical onet_physical,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("体力能力-本文") ytitle("体力能力-CHAT-GPT")
graph save "$Out_lab\appII_Fig2-1c",replace
graph export "$Out_lab\appII_Fig2-1c.png",replace

twoway (scatter NL_routinemanual onet_routinemanual) ///
(lfit NL_routinemanual onet_routinemanual,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("重复性体力-本文") ytitle("重复性体力-CHAT-GPT")
graph save "$Out_lab\appII_Fig2-4b",replace
graph export "$Out_lab\appII_Fig2-4b.png",replace

twoway (scatter NL_routinecognitive onet_routinecognitive) ///
(lfit NL_routinecognitive onet_routinecognitive,lp(solid)), ///
legend(off) scheme(plotplain) xtitle("重复性认知-本文") ytitle("重复性认知-CHAT-GPT")
graph save "$Out_lab\appII_Fig2-5b",replace
graph export "$Out_lab\appII_Fig2-5b.png",replace

erase table.dta
erase tabledata1.dta
erase tabledata2.dta
erase tabledata3.dta
erase tabledata4.dta
erase tabledata5.dta
erase tabledata6.dta

log close

